// Trade-Policy Dynamics: Evidence from 60 Years of U.S.-China Trade
// Alessandria,  Khan, Khederlarian, Ruhl, and Steinberg

// inputs:	imports_sitc2_74-18.dta
//			Elasticities_Soderbery2018_1.dta
// 		   	HS2022toSITC2ConversionAndCorrelationTables.xlsx
//			sitc2isic.dta

// outputs: dataset_sitc.dta

// packages needed: unique


clear all 
set varabbrev off

timer on 1
global dir_raw "../01 Raw data/"
global dir_int "../20 Intermediate files/"

use "${dir_int}imports_sitc2_74-18.dta", clear

////////////////////////////////////////////////////////////////////////////////
// Clean up some country names, drop unkown countries, aggregate some countries.
{
	replace cty = lower(cty)
	replace cty="china" if cty=="china (mainland)"
	replace cty="germany" if cty=="east germany" 
	replace cty="ussr" if cty=="soviet union" 
	replace cty="jordan" if cty=="jordon" 
	replace cty="israel" if cty=="i-p" 
	replace cty="cambod" if cty=="cambodia" 
	replace cty="korea_n" if cty=="north korea" 
	replace cty="austral" if cty=="australia" 
	replace cty="korea_s" if cty=="korea, republic of" 
	replace cty="guatmala" if cty=="guatemala" 
	replace cty="singapr" if cty=="singapore" 
	replace cty="cos_rica" if cty=="costa rica" 
	replace cty="salvadr" if cty=="el salvador" 
	replace cty="hondura" if cty=="honduras" 
	replace cty="nicaraga" if cty=="nicaragua" 
	replace cty="dom_rep" if cty=="dominican republic" 
	replace cty="" if cty=="" 
	drop if cty=="unknown" | cty=="asia_nes" | cty==""

	// Merge new countries into old, clean up iso codes
	replace cty="ussr" if cty=="russia" | cty=="belarus" | cty=="uzbekist" | cty=="ukraine" | cty=="turkmeni" | cty=="tajikist" | cty=="moldova" | ///
			cty=="lithuani" | cty=="latvia" | cty=="kyrgyzst" | cty=="kazakhst" | cty=="estonia" | cty=="armenia" | cty=="georgia" | cty=="azerbaij"
	replace cty="czecho" if cty=="czechrep" | cty=="slovakia" 
	replace cty="german" if cty=="german_e" 
	replace iso3="DEU" if iso3=="DDR"
	replace iso3="ROU" if cty=="romania"
	replace iso3="COD" if iso3=="ZAR"
	rename iso3 cty_iso3

	// Aggregate to the new countries created above. 
	collapse (first) cty_iso3 (sum) v_jst vcif_jst duties_jst dvalue_jst charge_jst qty_jst count*_jst (mean) *ntr_mean* s_mean* tar_unwgt *ntr_med* s_med* phase, by(cty sitc2 year)
}


////////////////////////////////////////////////////////////////////////////////
// Create dummies and other controls, transformations, etc. 
{
	// Create sitc 1, 2, 3, 4 identifiers (string and numeric)
	egen id = group(cty sitc2)
	rename sitc2 sitc
	encode sitc, gen(sitcn)
	forv n=1/4 {
		gen sitc`n' = substr(sitc,1,`n')
		encode sitc`n', gen(sitc`n'n)
	}

	encode cty, gen(ctyn)

	// NTR dummy (Note: Yugoslavia and poland were exempted from NNTR treatment after 1962)
	gen ntr = cty!="ussr" & cty!="china" & cty!="romania" & cty!="hungary" & cty!="czecho" & cty!="cuba" & cty!="vietnam" & cty!="cambod" & cty!="albania" & cty!="bulgaria" & cty!="korea_n"

	// FTA dummy (Source: https://ustr.gov/trade-agreements/free-trade-agreements)
	gen fta= (cty=="israel" & year>1984) | (cty=="jordon" & year>2000) | (cty=="austral" & year>2004) | (cty=="singapr" & year>2003) ///
			 | (cty=="chile" & year>2003) | (cty=="bahrain" & year>2005) | (cty=="morocco" & year>2005) | (cty=="oman" & year>2005) | (cty=="peru" & year>2007) ///
			 | (cty=="cos_rica" & year>2008) | (cty=="salvadr" & year>2008) | (cty=="guatmala" & year>2008) | (cty=="hondura" & year>2008) ///
			 | (cty=="nicaraga" & year>2008) | (cty=="dom_rep" & year>2008) | (cty=="korea_s" & year>2009) ///
			 | (cty=="colombia" & year>2011) | (cty=="panama" & year>2011) | (cty=="japan" & year>2019) | (cty=="canada" & year>1987) | (cty=="mexico" & year>1993)

	// Other dummies
	gen chn = cty=="china"
	gen vnm = cty=="vietnam"
	gen postWTO = year>2000

	// Life cycle variables related to years a good has been exported by a country
	bysort cty sitc: egen tempmin = min(year)
	gen first_yr = year == tempmin
	gen second_yr = year == tempmin+1
	gen age = year-tempmin+1
	gen age2 = age^2
	bysort cty sitc: egen tempmax = max(year)
	gen last_yr = year == tempmax
	gen last2_yr = year == tempmax-1
	drop temp*

	// Independent and dependent variables
	rename s_med_01 gap_med
	rename s_mean_01 gap_avg
	rename nntr_med_01 nntr_med
	rename nntr_mean_01 nntr_avg
	rename s_med_99 gap_med_99
	rename s_mean_99 gap_avg_99
	rename s_med_94 gap_med_94
	rename s_mean_94 gap_avg_94
	rename *_mean* *_avg*
	gen lv_jst = log(v_jst)
	gen tariff_jst = duties_jst/v_jst
	gen shipping_jst = charge_jst/v_jst
	bysort sitc: egen applied_nntr = mean(tariff_jst/(chn==1 & year<1980))
	bysort sitc year: egen applied_ntr = mean(tariff_jst/(fta==0 & ntr==1))
	bysort sitc year: egen applied_chn = mean(tariff_jst/(chn==1))
	gen applied_gap = applied_nntr-applied_ntr
	replace applied_gap=0 if applied_gap<0
	foreach x in tariff_jst shipping_jst nntr_med gap_med gap_med_94 gap_med_99 applied_nntr applied_gap {
		gen l`x' = log(1+`x')
	}
	
	// Create the balanced sample identifier
	bysort cty sitc: egen tempmin = min(year)
	gen tempchn = tempmin<=1980 & chn==1
	bysort sitc: egen balanced_chn = max(tempchn)
	drop temp*
}

////////////////////////////////////////////////////////////////////////////////
// Add the Soderbery (2018) elasticities to the dataset. 
{
	// Process the elasticity data: drop outliers, aggregate to HS4
	preserve
		use "${dir_raw}Elasticities_Soderbery2018.dta", clear
		qui sum sigma, d
		loc bound = r(p99)
		di `bound'
		drop if sigma>1.5*`bound'
		bysort hs4: egen delast_S18_usachn = mean(sigma/(iiso=="USA" & eiso=="CHN"))
		bysort hs4: egen delast_S18_usa = median(sigma/(iiso=="USA"))
		bysort hs4: egen delast_S18_all = median(sigma)
		egen temptag=tag(hs4)
		drop if temptag==0
		keep hs4 delast*
		save "${dir_int}temporary_delast_S18.dta", replace

		// Concord the elasticities to SITC level
		import excel "${dir_raw}HS2022toSITC2ConversionAndCorrelationTables.xlsx", sheet("Correlation HS22-SITC2") firstrow clear
		g hs4=substr(HS2022, 1,4)
		rename SITCRev2 sitc
		egen temptag=tag(sitc hs4)
		bysort sitc: egen temp=sum(temptag)
		drop if temptag==0
		keep hs4 sitc temp
		sort sitc
		merge m:1 hs4 using "${dir_int}temporary_delast_S18.dta" 
		drop temp _merge
		foreach x in delast_S18_usachn delast_S18_usa delast_S18_all {
			bysort sitc: egen temp = median(`x')
			replace `x'=temp
			drop temp
		}
		egen temptag=tag(sitc)
		drop if temptag==0
		keep sitc delast*
		save "${dir_int}temporary_delast_S18.dta", replace
	restore

	// Add the elasticity data to the dataset
	merge m:1 sitc using "${dir_int}temporary_delast_S18.dta"
	erase "${dir_int}temporary_delast_S18.dta"
	drop if _merge==2
	drop _merge
}


////////////////////////////////////////////////////////////////////////////////
// Add ISIC coding. Create some industry aggregates.
{
	preserve
		use "${dir_raw}sitc2isic.dta", clear
		rename sitc2 sitc4
		save "${dir_int}temporary_isic.dta", replace
	restore

	merge m:1 sitc4 using "${dir_int}temporary_isic.dta"
	unique sitc if _merge==1
	drop if _merge==2
	drop _merge


	g sector = ""
	replace sector="Food, beverage and tobacco" if isic2==311 | isic2==312 | isic2==313 | isic2==314
	replace sector="Textile, clothing, leather and footwear manufacturing" if isic2==321 | isic2==322 | isic2==323 | isic2==324
	replace sector="Wood and straw products" if isic2==331 | isic2==332
	replace sector="Paper and printing products" if isic2==341 | isic2==342
	replace sector="Energy products and chemicals" if isic2==351 | isic2==352 | isic2==353 | isic2==354
	replace sector="Rubber and plastic products" if isic2==355 | isic2==356
	replace sector="Non-metallic mineral products" if isic2==361 | isic2==362 | isic2==369
	replace sector="Base metal manufacturing" if isic2==371 | isic2==372
	replace sector="Calendered metal manufacturing" if isic2==381
	replace sector="Other machinery and equipment manufacturing industry" if isic2==382
	replace sector="Computer, electronic and optical products" if isic2==385
	replace sector="Electrical equipment manufacturing" if isic2==383
	replace sector="Vehicle manufacturing" if isic2==384
	replace sector="Furniture and other manufacturing" if isic2==390 | isic2==332

	g sector_n=.
	replace sector_n=1 if isic2==311 | isic2==312 | isic2==313 | isic2==314
	replace sector_n=2 if isic2==321 | isic2==322 | isic2==323 | isic2==324
	replace sector_n=3 if isic2==331 | isic2==332
	replace sector_n=4 if isic2==341 | isic2==342
	replace sector_n=5 if isic2==351 | isic2==352 | isic2==353 | isic2==354
	replace sector_n=6 if isic2==355 | isic2==356
	replace sector_n=7 if isic2==361 | isic2==362 | isic2==369
	replace sector_n=8 if isic2==371 | isic2==372
	replace sector_n=9 if isic2==381
	replace sector_n=10 if isic2==382
	replace sector_n=11 if isic2==385
	replace sector_n=12 if isic2==383
	replace sector_n=13 if isic2==384
	replace sector_n=14 if isic2==390 | isic2==332

	replace sector="Non-Manufacturing" if sector==""
	replace sector_n=15 if sector_n==.
}

////////////////////////////////////////////////////////////////////////////////
// Save the dataset
sort cty sitc year 
order cty* sitc* id sector* year 
compress
save "${dir_int}dataset_sitc.dta", replace

timer off 1
timer list